## This file generates the QOI for the WSJ article

setwd("c:/users/stephen/hockey research/other sports/ncaa brackets")

load("all brackets.RData")

## Clean up the final four and finals

brackets$ff <- rowSums(cbind((brackets$ff1 == "florida") * 80,
                             (brackets$ff2 == "uconn") * 80,
                             (brackets$ff3 == "wisconsin") * 80,
                             (brackets$ff4 == "kentucky") * 80))
brackets$finals <- rowSums(cbind((brackets$finalist1 == "uconn") * 160,
                                 (brackets$finalist2 == "kentucky") * 160))

nearperfect <- matrix(NA, nrow = 5, ncol = 3)
colnames(nearperfect) <- c("All teams", "All but one", "All but two")

# Get the percent of brackets that were perfect at each round

nearperfect[1:5,1] <- c(sum(brackets$round1 == 320),
                        sum(brackets$round2 == 320),
                        sum(brackets$sweet16 == 320),
                        612, 
                        1780)# as reported by ESPN.com
                        #sum(brackets$ff == 320))#,
                        #sum(brackets$finals == 320))

# Get the percent of brackets that were 1 away from perfect at each round
nearperfect[1:5,2] <- c(sum(brackets$round1 == 310),
                        sum(brackets$round2 == 300),
                        sum(brackets$sweet16 == 280),
                        51045, # as reported by ESPN.com
                        #sum(brackets$ff == 240))#,
                        sum(brackets$finals == 160))


# Get the percent of brackets that were 1 away from perfect at each round
nearperfect[1:5,3] <- c(sum(brackets$round1 == 300),
                        sum(brackets$round2 == 280),
                        sum(brackets$sweet16 == 240),
                        sum(brackets$ff == 160),
                        sum(brackets$finals == 0))

nearperfect <- apply(nearperfect, 2, function(x) paste0(x, " (", round(x/nrow(brackets) * 100, 2), "%)"))
rownames(nearperfect) <- c("Round of 32", "Sweet 16", "Elite 8", "Final Four", "Championship")

write.csv(nearperfect, file = "nearperfect.csv")

# Get the table of predicted national champions
#sort(table(brackets$champ))


# Get total number of games correct

brackets$correct <- rowSums(cbind(brackets$round1 / 10,
                                  brackets$round2 / 20,
                                  brackets$sweet16 / 40,
                                  brackets$ff / 80,
                                  brackets$finals / 160))

#table(brackets$correct)


summary(brackets$correct)

# The MLE of p in a binomial is just sum(y) / length(y) / #trials
p <- sum(brackets$correct) / nrow(brackets) / 62




# So the probability of getting all 63 correct is:
allcorrect <- dbinom(63, size = 63, prob = p)
allcorrect
1/allcorrect
# 1 in 1.47 quadrillian

# With 12 million brackets, the probability of somebody getting it perfect is:
1 - (1-allcorrect) ^ 11000000
1 / (1 - (1-allcorrect) ^ 11000000)
# 1 in 136 million

# How many brackets to have a better than 1% chance of 1 perfect bracket:
1 - (1-allcorrect) ^ 15087562940000
15087562940000/317000000
# 15.1 trillion brackets

# How many brackets to have a better than 50% chance of 1 perfect bracket:
1 - (1-allcorrect) ^ 1040552461500000
# 1.04 quadrillon brackets
1040552461500000 / 317000000
